(ns metabase.mbql.schema
  "Schema for validating a *normalized* MBQL query. This is also the definitive grammar for MBQL, wow!"
  (:refer-clojure
   :exclude
   [count distinct min max + - / * and or not = < > <= >= time case concat replace])
  (:require [clojure
             [core :as core]
             [set :as set]]
            [metabase.mbql.schema.helpers :refer [defclause is-clause? one-of]]
            [metabase.mbql.util.match :as match]
            [metabase.util.schema :as su]
            [schema.core :as s])
  (:import java.time.format.DateTimeFormatter))

;; A NOTE ABOUT METADATA:
;;
;; Clauses below are marked with the following tags for documentation purposes:
;;
;; *  Clauses marked `^:sugar` are syntactic sugar primarily intended to make generating queries easier on the
;;    frontend. These clauses are automatically rewritten as simpler clauses by the `desugar` or `expand-macros`
;;    middleware. Thus driver implementations do not need to handle these clauses.
;;
;; *  Clauses marked `^:internal` are automatically generated by `wrap-value-literals` or other middleware from values
;;    passed in. They are not intended to be used by the frontend when generating a query. These add certain
;;    information that simplify driver implementations. When writing MBQL queries yourself you should pretend these
;;    clauses don't exist.
;;
;; *  Clauses marked `^{:requires-features #{feature+}}` require a certain set of features to be used. At some date in
;;    the future we will likely add middleware that uses this metadata to automatically validate that a driver has the
;;    features needed to run the query in question.

;;; +----------------------------------------------------------------------------------------------------------------+
;;; |                                                  MBQL Clauses                                                  |
;;; +----------------------------------------------------------------------------------------------------------------+

;;; ------------------------------------------------- Datetime Stuff -------------------------------------------------

(def DatetimeFieldUnit
  "Schema for all valid datetime bucketing units."
  (s/named
   (apply s/enum #{:default :minute :minute-of-hour :hour :hour-of-day :day :day-of-week :day-of-month :day-of-year
                   :week :week-of-year :month :month-of-year :quarter :quarter-of-year :year})
   "datetime-unit"))

(def ^:private RelativeDatetimeUnit
  (s/named
   (apply s/enum #{:default :minute :hour :day :week :month :quarter :year})
   "relative-datetime-unit"))

(defn- can-parse-iso-8601? [^DateTimeFormatter formatter, ^String s]
  (when (string? s)
    (try
      (.parse formatter s)
      true
      (catch Throwable _
        false))))

(def ^:private ^{:arglists '([s])} can-parse-date?     (partial can-parse-iso-8601? DateTimeFormatter/ISO_DATE))
(def ^:private ^{:arglists '([s])} can-parse-datetime? (partial can-parse-iso-8601? DateTimeFormatter/ISO_DATE_TIME))
(def ^:private ^{:arglists '([s])} can-parse-time?     (partial can-parse-iso-8601? DateTimeFormatter/ISO_TIME))

(def LiteralDateString
  "Schema for an ISO-8601-formatted date string literal."
  (s/constrained su/NonBlankString can-parse-date? "valid ISO-8601 datetime string literal"))

(def LiteralDatetimeString
  "Schema for an ISO-8601-formattedor datetime string literal."
  (s/constrained su/NonBlankString can-parse-datetime? "valid ISO-8601 datetime string literal"))

(def LiteralTimeString
  "Schema for an ISO-8601-formatted time string literal."
  (s/constrained su/NonBlankString can-parse-time? "valid ISO-8601 time string literal"))

(def TemporalLiteralString
  "Schema for either a literal datetime string, literal date string, or a literal time string."
  (s/named
   (s/conditional
    can-parse-datetime? LiteralDatetimeString
    can-parse-date?     LiteralDateString
    can-parse-time?     LiteralTimeString)
   "valid ISO-8601 datetime, date, or time string literal"))

;; TODO - `unit` is not allowed if `n` is `current`
(defclause relative-datetime
  n    (s/cond-pre (s/eq :current) s/Int)
  unit (optional RelativeDatetimeUnit))

(defclause interval
  n    s/Int
  unit RelativeDatetimeUnit)

;; This clause is automatically generated by middleware when datetime literals (literal strings or one of the Java
;; types) are encountered. Unit is inferred by looking at the Field the timestamp is compared against. Implemented
;; mostly to convenience driver implementations. You don't need to use this form directly when writing MBQL; datetime
;; literal strings are preferred instead.
;;
;; example:
;; [:= [:datetime-field [:field-id 10] :day] "2018-10-02"]
;;
;; becomes:
;; [:= [:datetime-field [:field-id 10] :day] [:absolute-datetime #inst "2018-10-02" :day]]
(defclause ^:internal absolute-datetime
  timestamp (s/cond-pre java.time.LocalDate
                        java.time.LocalDateTime
                        java.time.OffsetDateTime
                        java.time.ZonedDateTime)
  unit      DatetimeFieldUnit)

;; it could make sense to say hour-of-day(field) =  hour-of-day("2018-10-10T12:00")
;; but it does not make sense to say month-of-year(field) = month-of-year("08:00:00"),
;; does it? So we'll restrict the set of units a TimeValue can have to ones that have no notion of day/date.
(def TimeUnit
  "Valid unit for time bucketing."
  (apply s/enum #{:default :minute :minute-of-hour :hour :hour-of-day}))

;; almost exactly the same as `absolute-datetime`, but generated in some sitations where the literal in question was
;; clearly a time (e.g. "08:00:00.000") and/or the Field derived from `:type/Time` and/or the unit was a
;; time-bucketing unit
;;
;; TODO - should we have a separate `date` type as well
(defclause ^:internal time
  time (s/cond-pre java.time.LocalTime java.time.OffsetTime)
  unit TimeUnit)

(def ^:private DatetimeLiteral
  "Schema for valid absolute datetime literals."
  (s/conditional
   (partial is-clause? :absolute-datetime)
   absolute-datetime

   (partial is-clause? :time)
   time

   :else
   (s/cond-pre
    ;; literal datetime strings and Java types will get transformed to `absolute-datetime` clauses automatically by
    ;; middleware so drivers don't need to deal with these directly. You only need to worry about handling
    ;; `absolute-datetime` clauses.
    TemporalLiteralString

    java.time.LocalTime
    java.time.LocalDate
    java.time.LocalDateTime
    java.time.OffsetTime
    java.time.OffsetDateTime
    java.time.ZonedDateTime)))

(def DateTimeValue
  "Schema for a datetime value drivers will personally have to handle, either an `absolute-datetime` form or a
  `relative-datetime` form."
  (one-of absolute-datetime relative-datetime time))


;;; -------------------------------------------------- Other Values --------------------------------------------------

(def ValueTypeInfo
  "Type info about a value in a `:value` clause. Added automatically by `wrap-value-literals` middleware to values in
  filter clauses based on the Field in the clause."
  {(s/optional-key :database_type) (s/maybe su/NonBlankString)
   (s/optional-key :base_type)     (s/maybe su/FieldType)
   (s/optional-key :special_type)  (s/maybe su/FieldType)
   (s/optional-key :unit)          (s/maybe DatetimeFieldUnit)
   (s/optional-key :name)          (s/maybe su/NonBlankString)})

;; Arguments to filter clauses are automatically replaced with [:value <value> <type-info>] clauses by the
;; `wrap-value-literals` middleware. This is done to make it easier to implement query processors, because most driver
;; implementations dispatch off of Object type, which is often not enough to make informed decisions about how to
;; treat certain objects. For example, a string compared against a Postgres UUID Field needs to be parsed into a UUID
;; object, since text <-> UUID comparision doesn't work in Postgres. For this reason, raw literals in `:filter`
;; clauses are wrapped in `:value` clauses and given information about the type of the Field they will be compared to.
(defclause ^:internal value
  value    s/Any
  type-info (s/maybe ValueTypeInfo))


;;; ----------------------------------------------------- Fields -----------------------------------------------------

;; Normal lowest-level Field clauses refer to a Field either by ID or by name

(defclause field-id, id su/IntGreaterThanZero)

(defclause field-literal, field-name su/NonBlankString, field-type su/FieldType)

(defclause joined-field, alias su/NonBlankString, field (one-of field-id field-literal))

;; Both args in `[:fk-> <source-field> <dest-field>]` are implict `:field-ids`. E.g.
;;
;;   [:fk-> 10 20] --[NORMALIZE]--> [:fk-> [:field-id 10] [:field-id 20]]
;;
;; `fk->` clauses are automatically replaced by the Query Processor with appropriate `:joined-field` clauses during
;; preprocessing. Drivers do not need to handle `:fk->` clauses themselves.
(defclause ^{:requires-features #{:foreign-keys}} ^:sugar fk->
  source-field (one-of field-id field-literal)
  dest-field   (one-of field-id field-literal))

;; Expression *references* refer to a something in the `:expressions` clause, e.g. something like `[:+ [:field-id 1]
;; [:field-id 2]]`
(defclause ^{:requires-features #{:expressions}} expression
  expression-name su/NonBlankString)

;; `datetime-field` is used to specify DATE BUCKETING for a Field that represents a moment in time of some sort. There
;; is no requirement that all `:type/DateTime` derived Fields be wrapped in `datetime-field`, but for legacy reasons
;; `:field-id` clauses that refer to datetime Fields will be automatically "bucketed" in the `:breakout` and `:filter`
;; clauses, but nowhere else. Auto-bucketing only applies to `:filter` clauses when values for comparison are
;; `yyyy-MM-dd` date strings. See `auto-bucket-datetimes` for more details. `:field-id` clauses elsewhere will not be
;; automatically bucketed, so drivers still need to make sure they do any special datetime handling for plain
;; `:field-id` clauses when their Field derives from `:type/DateTime`.
;;
;; Datetime Field can wrap any of the lowest-level Field clauses, but not other datetime-field clauses, because that
;; wouldn't make sense. They similarly can not wrap expression references, because doing arithmetic on timestamps
;; doesn't make a whole lot of sense (what does `"2018-10-23"::timestamp / 2` mean?).
;;
;; Field is an implicit Field ID
(defclause datetime-field
  field (one-of field-id field-literal fk-> joined-field)
  unit  DatetimeFieldUnit)

;; binning strategy can wrap any of the above clauses, but again, not another binning strategy clause
(def BinningStrategyName
  "Schema for a valid value for the `strategy-name` param of a `binning-strategy` clause."
  (s/enum :num-bins :bin-width :default))

(def BinnableField
  "Schema for any sort of field clause that can be wrapped by a `binning-strategy` clause."
  (one-of field-id field-literal joined-field fk-> datetime-field))

(def ResolvedBinningStrategyOptions
  "Schema for map of options tacked on to the end of `binning-strategy` clauses by the `binning` middleware."
  {:num-bins   su/IntGreaterThanZero
   :bin-width  (s/constrained s/Num (complement neg?) "bin width must be >= 0.")
   :min-value  s/Num
   :max-value  s/Num})

;; TODO - binning strategy param is disallowed for `:default` and required for the others. For `num-bins` it must also
;; be an integer.
(defclause ^{:requires-features #{:binning}} binning-strategy
  field            BinnableField
  strategy-name    BinningStrategyName
  strategy-param   (optional (s/constrained s/Num (complement neg?) "strategy param must be >= 0."))
  ;; These are added in automatically by the `binning` middleware. Don't add them yourself, as they're just be
  ;; replaced. Driver implementations can rely on this being populated
  resolved-options (optional ResolvedBinningStrategyOptions))

(def ^:private Field*
  (one-of field-id field-literal joined-field fk-> datetime-field expression binning-strategy))

(def Field
  "Schema for anything that refers to a Field, from the common `[:field-id <id>]` to variants like `:datetime-field` or
  `:fk->` or an expression reference `[:expression <name>]`."
  (s/recursive #'Field*))

;; aggregate field reference refers to an aggregation, e.g.
;;
;;    {:aggregation [[:count]]
;;     :order-by    [[:asc [:aggregation 0]]]} ;; refers to the 0th aggregation, `:count`
;;
;; Currently aggregate Field references can only be used inside order-by clauses. In the future once we support SQL
;; `HAVING` we can allow them in filter clauses too
;;
;; TODO - shouldn't we allow composing aggregations in expressions? e.g.
;;
;;    {:order-by [[:asc [:+ [:aggregation 0] [:aggregation 1]]]]}
;;
;; TODO - it would be nice if we could check that there's actually an aggregation with the corresponding index,
;; wouldn't it
(defclause aggregation, aggregation-clause-index s/Int)

(def FieldOrAggregationReference
  "Schema for any type of valid Field clause, or for an indexed reference to an aggregation clause."
  (s/if (partial is-clause? :aggregation)
    aggregation
    Field))


;;; -------------------------------------------------- Expressions ---------------------------------------------------

;; Expressions are "calculated column" definitions, defined once and then used elsewhere in the MBQL query.

(def string-expressions
  "String functions"
  #{:substring :trim :rtrim :ltrim :upper :lower :replace :concat :regex-match-first :coalesce})

(declare StringExpression)

(def ^:private StringExpressionArg
  (s/conditional
   string?
   s/Str

   (partial is-clause? string-expressions)
   (s/recursive #'StringExpression)

   (partial is-clause? :value)
   value

   :else
   Field))

(def ^:private arithmetic-expressions #{:+ :- :/ :* :coalesce :length :round :ceil :floor :abs :power :sqrt :log :exp})

(def ^:private aggregations #{:sum :avg :stddev :var :median :percentile :min :max :cum-count :cum-sum :count-where :sum-where :share :distinct :metric :aggregation-options :count})

(declare ArithmeticExpression)
(declare Aggregation)

(def ^:private NumericExpressionArg
  (s/conditional
   number?
   s/Num

   (partial is-clause? arithmetic-expressions)
   (s/recursive #'ArithmeticExpression)

   (partial is-clause? aggregations)
   (s/recursive #'Aggregation)

   (partial is-clause? :value)
   value

   :else
   Field))

(def ^:private ExpressionArg
  (s/conditional
   number?
   s/Num

   (partial is-clause? arithmetic-expressions)
   (s/recursive #'ArithmeticExpression)

   string?
   s/Str

   (partial is-clause? string-expressions)
   (s/recursive #'StringExpression)

   (partial is-clause? :value)
   value

   :else
   Field))

(def ^:private NumericExpressionArgOrInterval
  (s/if (partial is-clause? :interval)
    interval
    NumericExpressionArg))

(defclause ^{:requires-features #{:expressions}} coalesce
  a ExpressionArg, b ExpressionArg, more (rest ExpressionArg))

(defclause ^{:requires-features #{:expressions}} substring
  s StringExpressionArg, start NumericExpressionArg, length (optional NumericExpressionArg))

(defclause ^{:requires-features #{:expressions}} length
  s StringExpressionArg)

(defclause ^{:requires-features #{:expressions}} trim
  s StringExpressionArg)

(defclause ^{:requires-features #{:expressions}} rtrim
  s StringExpressionArg)

(defclause ^{:requires-features #{:expressions}} ltrim
  s StringExpressionArg)

(defclause ^{:requires-features #{:expressions}} upper
  s StringExpressionArg)

(defclause ^{:requires-features #{:expressions}} lower
  s StringExpressionArg)

(defclause ^{:requires-features #{:expressions}} replace
  s StringExpressionArg, match s/Str, replacement s/Str)

(defclause ^{:requires-features #{:expressions}} concat
  a StringExpressionArg, b StringExpressionArg, more (rest StringExpressionArg))

(defclause ^{:requires-features #{:expressions :regex}} regex-match-first
  s StringExpressionArg, pattern s/Str)

(def ^:private StringExpression*
  (one-of substring trim ltrim rtrim replace lower upper concat regex-match-first coalesce))

(def ^:private StringExpression
  "Schema for the definition of an string expression."
  (s/recursive #'StringExpression*))

(defclause ^{:requires-features #{:expressions}} +
  x NumericExpressionArg, y NumericExpressionArgOrInterval, more (rest NumericExpressionArgOrInterval))

(defclause ^{:requires-features #{:expressions}} -
  x NumericExpressionArg, y NumericExpressionArgOrInterval, more (rest NumericExpressionArgOrInterval))

(defclause ^{:requires-features #{:expressions}} /, x NumericExpressionArg, y NumericExpressionArg, more (rest NumericExpressionArg))

(defclause ^{:requires-features #{:expressions}} *, x NumericExpressionArg, y NumericExpressionArg, more (rest NumericExpressionArg))

(defclause ^{:requires-features #{:expressions}} floor
  x NumericExpressionArg)

(defclause ^{:requires-features #{:expressions}} ceil
  x NumericExpressionArg)

(defclause ^{:requires-features #{:expressions}} round
  x NumericExpressionArg)

(defclause ^{:requires-features #{:expressions}} abs
  x NumericExpressionArg)

(defclause ^{:requires-features #{:advanced-math-expressions}} power
  x NumericExpressionArg,  y NumericExpressionArg)

(defclause ^{:requires-features #{:advanced-math-expressions}} sqrt
  x NumericExpressionArg)

(defclause ^{:requires-features #{:advanced-math-expressions}} exp
  x NumericExpressionArg)

(defclause ^{:requires-features #{:advanced-math-expressions}} log
  x NumericExpressionArg)

(def ^:private ArithmeticExpression*
  (one-of + - / * coalesce length floor ceil round abs power sqrt exp log))

(def ^:private ArithmeticExpression
  "Schema for the definition of an arithmetic expression."
  (s/recursive #'ArithmeticExpression*))


;;; ----------------------------------------------------- Filter -----------------------------------------------------

(declare Filter)

(defclause and
  first-clause  (s/recursive #'Filter)
  second-clause (s/recursive #'Filter)
  other-clauses (rest (s/recursive #'Filter)))

(defclause or
  first-clause  (s/recursive #'Filter)
  second-clause (s/recursive #'Filter)
  other-clauses (rest (s/recursive #'Filter)))

(defclause not, clause (s/recursive #'Filter))

(def ^:private FieldOrRelativeDatetime
  (s/if (partial is-clause? :relative-datetime)
   relative-datetime
   Field))

(def ^:private EqualityComparible
  "Schema for things things that make sense in a `=` or `!=` filter, i.e. things that can be compared for equality."
  (s/maybe
   (s/cond-pre
    s/Bool
    s/Num
    s/Str
    DatetimeLiteral
    FieldOrRelativeDatetime
    ExpressionArg
    value)))

(def ^:private OrderComparible
  "Schema for things that make sense in a filter like `>` or `<`, i.e. things that can be sorted."
  (s/if (partial is-clause? :value)
    value
    (s/cond-pre
     s/Num
     s/Str
     DatetimeLiteral
     ExpressionArg
     FieldOrRelativeDatetime)))

;; For all of the non-compound Filter clauses below the first arg is an implicit Field ID

;; These are SORT OF SUGARY, because extra values will automatically be converted a compound clauses. Driver
;; implementations only need to handle the 2-arg forms.
;;
;; `=` works like SQL `IN` with more than 2 args
;; [:= [:field-id 1] 2 3] --[DESUGAR]--> [:or [:= [:field-id 1] 2] [:= [:field-id 1] 3]]
;;
;; `!=` works like SQL `NOT IN` with more than 2 args
;; [:!= [:field-id 1] 2 3] --[DESUGAR]--> [:and [:!= [:field-id 1] 2] [:!= [:field-id 1] 3]]

(defclause =,  field EqualityComparible, value-or-field EqualityComparible, more-values-or-fields (rest EqualityComparible))
(defclause !=, field EqualityComparible, value-or-field EqualityComparible, more-values-or-fields (rest EqualityComparible))

(defclause <,  field OrderComparible, value-or-field OrderComparible)
(defclause >,  field OrderComparible, value-or-field OrderComparible)
(defclause <=, field OrderComparible, value-or-field OrderComparible)
(defclause >=, field OrderComparible, value-or-field OrderComparible)

(defclause between field OrderComparible, min OrderComparible, max OrderComparible)

;; SUGAR CLAUSE: This is automatically written as a pair of `:between` clauses by the `:desugar` middleware.
(defclause ^:sugar inside
  lat-field OrderComparible
  lon-field OrderComparible
  lat-max   OrderComparible
  lon-min   OrderComparible
  lat-min   OrderComparible
  lon-max   OrderComparible)

;; SUGAR CLAUSES: These are rewritten as `[:= <field> nil]` and `[:not= <field> nil]` respectively
(defclause ^:sugar is-null,  field Field)
(defclause ^:sugar not-null, field Field)

(def ^:private StringFilterOptions
  {(s/optional-key :case-sensitive) s/Bool}) ; default true

(defclause starts-with, field StringExpressionArg, string-or-field StringExpressionArg, options (optional StringFilterOptions))
(defclause ends-with,   field StringExpressionArg, string-or-field StringExpressionArg, options (optional StringFilterOptions))
(defclause contains,    field StringExpressionArg, string-or-field StringExpressionArg, options (optional StringFilterOptions))

;; SUGAR: this is rewritten as [:not [:contains ...]]
(defclause ^:sugar does-not-contain
  field StringExpressionArg, string-or-field StringExpressionArg, options (optional StringFilterOptions))

(def ^:private TimeIntervalOptions
  ;; Should we include partial results for the current day/month/etc? Defaults to `false`; set this to `true` to
  ;; include them.
  {(s/optional-key :include-current) s/Bool}) ; default false

;; Filter subclause. Syntactic sugar for specifying a specific time interval.
;;
;; Return rows where datetime Field 100's value is in the current month
;;
;;    [:time-interval [:field-id 100] :current :month]
;;
;; Return rows where datetime Field 100's value is in the current month, including partial results for the
;; current day
;;
;;    [:time-interval [:field-id 100] :current :month {:include-current true}]
;;
;; SUGAR: This is automatically rewritten as a filter clause with a relative-datetime value
(defclause ^:sugar time-interval
  field   (one-of field-id fk-> field-literal joined-field)
  n       (s/cond-pre
           s/Int
           (s/enum :current :last :next))
  unit    RelativeDatetimeUnit
  options (optional TimeIntervalOptions))

;; A segment is a special `macro` that saves some pre-definied filter clause, e.g. [:segment 1]
;; this gets replaced by a normal Filter clause in MBQL macroexpansion
;;
;; It can also be used for GA, which looks something like `[:segment "gaid::-11"]`. GA segments aren't actually MBQL
;; segments and pass-thru to GA.
(defclause ^:sugar segment, segment-id (s/cond-pre su/IntGreaterThanZero su/NonBlankString))

(def ^:private Filter*
  (s/conditional
   (partial is-clause? arithmetic-expressions) ArithmeticExpression
   (partial is-clause? string-expressions)     StringExpression
   :else
   (one-of
    ;; filters drivers must implement
    and or not = != < > <= >= between starts-with ends-with contains
    ;; SUGAR filters drivers do not need to implement
    does-not-contain inside is-null not-null time-interval segment)))

(def Filter
  "Schema for a valid MBQL `:filter` clause."
  (s/recursive #'Filter*))

(def ^:private CaseClause [(s/one Filter "pred") (s/one ExpressionArg "expr")])

(def ^:private CaseClauses [CaseClause])

(def ^:private CaseOptions
  {(s/optional-key :default) ExpressionArg})

(defclause ^{:requires-features #{:basic-aggregations}} case
  clauses CaseClauses, options (optional CaseOptions))

(def FieldOrExpressionDef
  "Schema for anything that is accepted as a top-level expression definition, either an arithmetic expression such as a
  `:+` clause or a Field clause such as `:field-id`."
  (s/conditional
   (partial is-clause? arithmetic-expressions) ArithmeticExpression
   (partial is-clause? string-expressions)     StringExpression
   (partial is-clause? :case)                  case
   :else                                       Field))


;;; -------------------------------------------------- Aggregations --------------------------------------------------

;; For all of the 'normal' Aggregations below (excluding Metrics) fields are implicit Field IDs

;; cum-sum and cum-count are SUGAR because they're implemented in middleware. They clauses are swapped out with
;; `count` and `sum` aggregations respectively and summation is done in Clojure-land
(defclause ^{:requires-features #{:basic-aggregations}} ^:sugar count,     field (optional Field))
(defclause ^{:requires-features #{:basic-aggregations}} ^:sugar cum-count, field (optional Field))

;; technically aggregations besides count can also accept expressions as args, e.g.
;;
;;    [[:sum [:+ [:field-id 1] [:field-id 2]]]]
;;
;; Which is equivalent to SQL:
;;
;;    SUM(field_1 + field_2)

(defclause ^{:requires-features #{:basic-aggregations}} avg,      field-or-expression FieldOrExpressionDef)
(defclause ^{:requires-features #{:basic-aggregations}} cum-sum,  field-or-expression FieldOrExpressionDef)
(defclause ^{:requires-features #{:basic-aggregations}} distinct, field-or-expression FieldOrExpressionDef)
(defclause ^{:requires-features #{:basic-aggregations}} sum,      field-or-expression FieldOrExpressionDef)
(defclause ^{:requires-features #{:basic-aggregations}} min,      field-or-expression FieldOrExpressionDef)
(defclause ^{:requires-features #{:basic-aggregations}} max,      field-or-expression FieldOrExpressionDef)

(defclause ^{:requires-features #{:basic-aggregations}} sum-where
  field-or-expression FieldOrExpressionDef, pred Filter)

(defclause ^{:requires-features #{:basic-aggregations}} count-where
  pred Filter)

(defclause ^{:requires-features #{:basic-aggregations}} share
  pred Filter)

(defclause ^{:requires-features #{:standard-deviation-aggregations}} stddev
  field-or-expression FieldOrExpressionDef)

(defclause ^{:requires-features #{:standard-deviation-aggregations}} [ag:var var]
  field-or-expression FieldOrExpressionDef)

(defclause ^{:requires-features #{:percentile-aggregations}} median
  field-or-expression FieldOrExpressionDef)

(defclause ^{:requires-features #{:percentile-aggregations}} percentile
  field-or-expression FieldOrExpressionDef, percentile NumericExpressionArg)


;; Metrics are just 'macros' (placeholders for other aggregations with optional filter and breakout clauses) that get
;; expanded to other aggregations/etc. in the expand-macros middleware
;;
;; METRICS WITH STRING IDS, e.g. `[:metric "ga:sessions"]`, are Google Analytics metrics, not Metabase metrics! They
;; pass straight thru to the GA query processor.
(defclause ^:sugar metric, metric-id (s/cond-pre su/IntGreaterThanZero su/NonBlankString))

;; the following are definitions for expression aggregations, e.g. [:+ [:sum [:field-id 10]] [:sum [:field-id 20]]]

(def ^:private UnnamedAggregation*
  (s/if (partial is-clause? arithmetic-expressions)
    ArithmeticExpression
    (one-of count avg cum-count cum-sum distinct stddev sum min max metric share count-where
            sum-where case median percentile ag:var)))

(def ^:private UnnamedAggregation
  (s/recursive #'UnnamedAggregation*))

(def AggregationOptions
  "Additional options for any aggregation clause when wrapping it in `:aggregation-options`."
  {;; name to use for this aggregation in the native query instead of the default name (e.g. `count`)
   (s/optional-key :name)         su/NonBlankString
   ;; user-facing display name for this aggregation instead of the default one
   (s/optional-key :display-name) su/NonBlankString})

(defclause aggregation-options
  aggregation UnnamedAggregation
  options     AggregationOptions)

(def Aggregation
  "Schema for anything that is a valid `:aggregation` clause."
  (s/if (partial is-clause? :aggregation-options)
    aggregation-options
    UnnamedAggregation))


;;; ---------------------------------------------------- Order-By ----------------------------------------------------

;; order-by is just a series of `[<direction> <field>]` clauses like
;;
;;    {:order-by [[:asc [:field-id 1]], [:desc [:field-id 2]]]}
;;
;; Field ID is implicit in these clauses

(defclause asc,  field FieldOrAggregationReference)
(defclause desc, field FieldOrAggregationReference)

(def OrderBy
  "Schema for an `order-by` clause subclause."
  (one-of asc desc))


;;; +----------------------------------------------------------------------------------------------------------------+
;;; |                                                    Queries                                                     |
;;; +----------------------------------------------------------------------------------------------------------------+

;;; ---------------------------------------------- Native [Inner] Query ----------------------------------------------

;; TODO - schemas for template tags and dimensions live in `metabase.query-processor.middleware.parameters.sql`. Move
;; them here when we get the chance.

(def ^:private TemplateTag
  s/Any) ; s/Any for now until we move over the stuff from the parameters middleware

(def NativeQuery
  "Schema for a valid, normalized native [inner] query."
  {:query                          s/Any
   (s/optional-key :template-tags) {su/NonBlankString TemplateTag}
   ;; collection (table) this query should run against. Needed for MongoDB
   (s/optional-key :collection)    (s/maybe su/NonBlankString)
   ;; other stuff gets added in my different bits of QP middleware to record bits of state or pass info around.
   ;; Everyone else can ignore them.
   s/Keyword                       s/Any})


;;; ----------------------------------------------- MBQL [Inner] Query -----------------------------------------------

(declare Query MBQLQuery)

(def SourceQuery
  "Schema for a valid value for a `:source-query` clause."
  (s/if (every-pred map? :native)
    ;; when using native queries as source queries the schema is exactly the same except use `:native` in place of
    ;; `:query` for reasons I do not fully remember (perhaps to make it easier to differentiate them from MBQL source
    ;; queries).
    (set/rename-keys NativeQuery {:query :native})
    (s/recursive #'MBQLQuery)))

(def SourceQueryMetadata
  "Schema for the expected keys for a single column in `:source-metadata` (`:source-metadata` is a sequence of these
  entries), if it is passed in to the query.

  This metadata automatically gets added for all source queries that are referenced via the `card__id` `:source-table`
  form; for explicit `:source-query`s you should usually include this information yourself when specifying explicit
  `:source-query`s."
  ;; TODO - there is a very similar schema in `metabase.sync.analyze.query-results`; see if we can merge them
  {:name                          su/NonBlankString
   :base_type                     su/FieldType
   ;; this is only used by the annotate post-processing stage, not really needed at all for pre-processing, might be
   ;; able to remove this as a requirement
   :display_name                  su/NonBlankString
   (s/optional-key :special_type) (s/maybe su/FieldType)
   ;; you'll need to provide this in order to use BINNING
   (s/optional-key :fingerprint)  (s/maybe su/Map)
   s/Any                          s/Any})

(def ^java.util.regex.Pattern source-table-card-id-regex
  "Pattern that matches `card__id` strings that can be used as the `:source-table` of MBQL queries."
  #"^card__[1-9]\d*$")

(def SourceTable
  "Schema for a valid value for the `:source-table` clause of an MBQL query."
  (s/cond-pre su/IntGreaterThanZero source-table-card-id-regex))

(def JoinField
  "Schema for any valid `Field` that is, or wraps, a `:joined-field` clause."
  (s/constrained
   Field
   (fn [field-clause]
     (seq (match/match field-clause [:joined-field true])))
   "`:joined-field` clause or Field clause wrapping a `:joined-field` clause"))

(def JoinFields
  "Schema for valid values of a join `:fields` clause."
  (s/named
   (su/distinct (su/non-empty [JoinField]))
   "Distinct, non-empty sequence of `:joined-field` clauses or Field clauses wrapping `:joined-field` clauses"))

(def JoinStrategy
  "Strategy that should be used to perform the equivalent of a SQL `JOIN` against another table or a nested query.
  These correspond 1:1 to features of the same name in driver features lists; e.g. you should check that the current
  driver supports `:full-join` before generating a Join clause using that strategy."
  (s/enum :left-join :right-join :inner-join :full-join))

(def Join
  "Perform the equivalent of a SQL `JOIN` with another Table or nested `:source-query`. JOINs are either explicitly
  specified in the incoming query, or implicitly generated when one uses a `:fk->` clause.
  In the top-level query, you can reference Fields from the joined table or nested query by the `:fk->` clause for
  implicit joins; for explicit joins, you *must* specify `:alias` yourself; you can then reference Fields by using a
  `:joined-field` clause, e.g.

    [:joined-field \"my_join_alias\" [:field-id 1]]                                ; for joins against other Tabless
    [:joined-field \"my_join_alias\" [:field-literal \"my_field\" :field/Integer]] ; for joins against nested queries"
  (->
   {;; *What* to JOIN. Self-joins can be done by using the same `:source-table` as in the query where this is specified.
    ;; YOU MUST SUPPLY EITHER `:source-table` OR `:source-query`, BUT NOT BOTH!
    (s/optional-key :source-table)
    SourceTable

    (s/optional-key :source-query)
    SourceQuery
    ;;
    ;; The condition on which to JOIN. Can be anything that is a valid `:filter` clause. For automatically-generated
    ;; JOINs this is always
    ;;
    ;;    [:= <source-table-fk-field> [:joined-field <join-table-alias> <dest-table-pk-field>]]
    ;;
    :condition
    Filter
    ;;
    ;; Defaults to `:left-join`; used for all automatically-generated JOINs
    ;;
    ;; Driver implementations: this is guaranteed to be present after pre-processing.
    (s/optional-key :strategy)
    JoinStrategy
    ;;
    ;; The Fields to include in the results *if* a top-level `:fields` clause *is not* specified. This can be either
    ;; `:none`, `:all`, or a sequence of Field clauses.
    ;;
    ;; *  `:none`: no Fields from the joined table or nested query are included (unless indirectly included by
    ;;    breakouts or other clauses). This is the default, and what is used for automatically-generated joins.
    ;;
    ;; *  `:all`: will include all of the Fields from the joined table or query
    ;;
    ;; *  a sequence of Field clauses: include only the Fields specified. Valid clauses are the same as the top-level
    ;;    `:fields` clause. This should be non-empty and all elements should be distinct. The normalizer will
    ;;    automatically remove duplicate fields for you, and replace empty clauses with `:none`.
    ;;
    ;; Driver implementations: you can ignore this clause. Relevant fields will be added to top-level `:fields` clause
    ;; with appropriate aliases.
    (s/optional-key :fields)
    (s/named
     (s/cond-pre
      (s/enum :all :none)
      JoinFields)
     (str
      "Valid Join `:fields`: `:all`, `:none`, or a sequence of `:joined-field` clauses,"
      " or clauses wrapping `:joined-field`."))
    ;;
    ;; The name used to alias the joined table or query. This is usually generated automatically and generally looks
    ;; like `table__via__field`. You can specify this yourself if you need to reference a joined field in a
    ;; `:joined-field` clause.
    ;;
    ;; Driver implementations: This is guaranteed to be present after pre-processing.
    (s/optional-key :alias)
    su/NonBlankString
    ;;
    ;; Used internally, only for annotation purposes in post-processing. When a join is implicitly generated via an
    ;; `:fk->` clause, the ID of the foreign key field in the source Table will be recorded here. This information is
    ;; used to add `fk_field_id` information to the `:cols` in the query results; I believe this is used to facilitate
    ;; drill-thru? :shrug:
    ;;
    ;; Don't set this information yourself. It will have no effect.
    (s/optional-key :fk-field-id)
    (s/maybe su/IntGreaterThanZero)
    ;;
    ;; Metadata about the source query being used, if pulled in from a Card via the `:source-table "card__id"` syntax.
    ;; added automatically by the `resolve-card-id-source-tables` middleware.
    (s/optional-key :source-metadata)
    (s/maybe [SourceQueryMetadata])

    s/Keyword s/Any}
   (s/constrained
    (every-pred
     (some-fn :source-table :source-query)
     (complement (every-pred :source-table :source-query)))
    "Joins can must have either a `source-table` or `source-query`, but not both.")))

(def Joins
  "Schema for a valid sequence of `Join`s. Must be a non-empty sequence, and `:alias`, if specified, must be unique."
  (s/constrained
   (su/non-empty [Join])
   #(su/empty-or-distinct? (filter some? (map :alias %)))
   "All join aliases must be unique."))

(def Fields
  "Schema for valid values of the MBQL `:fields` clause."
  (s/named
   (su/distinct (su/non-empty [Field]))
   "Distinct, non-empty sequence of Field clauses"))

(def MBQLQuery
  "Schema for a valid, normalized MBQL [inner] query."
  (->
   {(s/optional-key :source-query) SourceQuery
    (s/optional-key :source-table) SourceTable
    (s/optional-key :aggregation)  (su/non-empty [Aggregation])
    (s/optional-key :breakout)     (su/non-empty [Field])
    ;; TODO - expressions keys should be strings; fix this when we get a chance
    (s/optional-key :expressions)  {s/Keyword FieldOrExpressionDef}
    (s/optional-key :fields)       Fields
    (s/optional-key :filter)       Filter
    (s/optional-key :limit)        su/IntGreaterThanZero
    (s/optional-key :order-by)     (su/distinct (su/non-empty [OrderBy]))
    ;; page = page num, starting with 1. items = number of items per page.
    ;; e.g.
    ;; {:page 1, :items 10} = items 1-10
    ;; {:page 2, :items 10} = items 11-20
    (s/optional-key :page)         {:page  su/IntGreaterThanZero
                                    :items su/IntGreaterThanZero}
    ;;
    ;; Various bits of middleware add additonal keys, such as `fields-is-implicit?`, to record bits of state or pass
    ;; info to other pieces of middleware. Everyone else can ignore them.
    (s/optional-key :joins)        Joins
    ;;
    ;; Info about the columns of the source query. Added in automatically by middleware. This metadata is primarily
    ;; used to let power things like binning when used with Field Literals instead of normal Fields
    (s/optional-key :source-metadata) (s/maybe [SourceQueryMetadata])
    ;;
    ;; Other keys are added by middleware or frontend client for various purposes
    s/Keyword                      s/Any}

   (s/constrained
    (fn [query]
      (core/= 1 (core/count (select-keys query [:source-query :source-table]))))
    "Query must specify either `:source-table` or `:source-query`, but not both.")

   (s/constrained
    (fn [{:keys [breakout fields]}]
      (empty? (set/intersection (set breakout) (set fields))))
    "Fields specified in `:breakout` should not be specified in `:fields`; this is implied.")))


;;; ----------------------------------------------------- Params -----------------------------------------------------

(def ^:private Parameter
  "Schema for a valid, normalized query parameter."
  s/Any) ; s/Any for now until we move over the stuff from the parameters middleware


;;; ---------------------------------------------------- Options -----------------------------------------------------

(def ^:private Settings
  "Options that tweak the behavior of the query processor."
  ;; The timezone the query should be ran in, overriding the default report timezone for the instance.
  {(s/optional-key :report-timezone) su/NonBlankString
   ;; other Settings might be used somewhere, but I don't know about them. Add them if you come across them for
   ;; documentation purposes
   s/Keyword                         s/Any})

(def ^:private Constraints
  "Additional constraints added to a query limiting the maximum number of rows that can be returned. Mostly useful
  because native queries don't support the MBQL `:limit` clause. For MBQL queries, if `:limit` is set, it will
  override these values."
  (s/constrained
   { ;; maximum number of results to allow for a query with aggregations. If `max-results-bare-rows` is unset, this
    ;; applies to all queries
    (s/optional-key :max-results)           su/IntGreaterThanOrEqualToZero
    ;; maximum number of results to allow for a query with no aggregations.
    ;; If set, this should be LOWER than `:max-results`
    (s/optional-key :max-results-bare-rows) su/IntGreaterThanOrEqualToZero
    ;; other Constraints might be used somewhere, but I don't know about them. Add them if you come across them for
    ;; documentation purposes
    s/Keyword                               s/Any}
   (fn [{:keys [max-results max-results-bare-rows]}]
     (if-not (core/and max-results max-results-bare-rows)
       true
       (core/>= max-results max-results-bare-rows)))
   "max-results-bare-rows must be less or equal to than max-results"))

(def ^:private MiddlewareOptions
  "Additional options that can be used to toggle middleware on or off."
  {;; should we skip adding results_metadata to query results after running the query? Used by
   ;; `metabase.query-processor.middleware.results-metadata`; default `false`
   (s/optional-key :skip-results-metadata?)
   s/Bool

   ;; should we skip converting datetime types to ISO-8601 strings with appropriate timezone when post-processing
   ;; results? Used by `metabase.query-processor.middleware.format-rows`; default `false`
   (s/optional-key :format-rows?)
   s/Bool

   ;; disable the MBQL->native middleware. If you do this, the query will not work at all, so there are no cases where
   ;; you should set this yourself. This is only used by the `qp/query->preprocessed` function to get the fully
   ;; pre-processed query without attempting to convert it to native.
   (s/optional-key :disable-mbql->native?)
   s/Bool

   ;; Userland queries are ones ran as a result of an API call, Pulse, MetaBot query, or the like. Special handling is
   ;; done in the `process-userland-query` middleware for such queries -- results are returned in a slightly different
   ;; format, and QueryExecution entries are normally saved, unless you pass `:no-save` as the option.
   (s/optional-key :userland-query?)
   (s/maybe s/Bool)

   ;; Whether to add some default `max-results` and `max-results-bare-rows` constraints. By default, none are added,
   ;; although the functions that ultimately power most API endpoints tend to set this to `true`. See
   ;; `add-constraints` middleware for more details.
   (s/optional-key :add-default-userland-constraints?)
   (s/maybe s/Bool)

   ;; other middleware options might be used somewhere, but I don't know about them. Add them if you come across them
   ;; for documentation purposes
   s/Keyword
   s/Any})


;;; ------------------------------------------------------ Info ------------------------------------------------------

;; This stuff is used for informational purposes, primarily to record QueryExecution entries when a query is ran. Pass
;; them along if applicable when writing code that creates queries, but when working on middleware and the like you
;; can most likely ignore this stuff entirely.

(def Context
  "Schema for `info.context`; used for informational purposes to record how a query was executed."
  (s/enum :ad-hoc
          :csv-download
          :dashboard
          :embedded-dashboard
          :embedded-question
          :json-download
          :map-tiles
          :metabot
          :public-dashboard
          :public-question
          :pulse
          :question
          :xlsx-download))

;; TODO - this schema is somewhat misleading because if you use a function like
;; `qp/process-query-and-save-with-max-results-constraints!` some of these keys (e.g. `:context`) are in fact required
(def Info
  "Schema for query `:info` dictionary, which is used for informational purposes to record information about how a query
  was executed in QueryExecution and other places. It is considered bad form for middleware to change its behavior
  based on this information, don't do it!"
  {;; These keys are nice to pass in if you're running queries on the backend and you know these values. They aren't
   ;; used for permissions checking or anything like that so don't try to be sneaky
   (s/optional-key :context)      (s/maybe Context)
   (s/optional-key :executed-by)  (s/maybe su/IntGreaterThanZero)
   (s/optional-key :card-id)      (s/maybe su/IntGreaterThanZero)
   (s/optional-key :dashboard-id) (s/maybe su/IntGreaterThanZero)
   (s/optional-key :pulse-id)     (s/maybe su/IntGreaterThanZero)
   (s/optional-key :nested?)      (s/maybe s/Bool)
   ;; `:hash` gets added automatically by `process-query-and-save-execution!`, so don't try passing
   ;; these in yourself. In fact, I would like this a lot better if we could take these keys out of `:info` entirely
   ;; and have the code that saves QueryExceutions figure out their values when it goes to save them
   (s/optional-key :query-hash)   (s/maybe (Class/forName "[B"))})


;;; --------------------------------------------- Metabase [Outer] Query ---------------------------------------------

(def ^Integer saved-questions-virtual-database-id
  "The ID used to signify that a database is 'virtual' rather than physical.

   A fake integer ID is used so as to minimize the number of changes that need to be made on the frontend -- by using
   something that would otherwise be a legal ID, *nothing* need change there, and the frontend can query against this
   'database' none the wiser. (This integer ID is negative which means it will never conflict with a *real* database
   ID.)

   This ID acts as a sort of flag. The relevant places in the middleware can check whether the DB we're querying is
   this 'virtual' database and take the appropriate actions."
  -1337)
;; To the reader: yes, this seems sort of hacky, but one of the goals of the Nested Query Initiative™ was to minimize
;; if not completely eliminate any changes to the frontend. After experimenting with several possible ways to do this
;; implementation seemed simplest and best met the goal. Luckily this is the only place this "magic number" is defined
;; and the entire frontend can remain blissfully unaware of its value.

(def DatabaseID
  "Schema for a valid `:database` ID, in the top-level 'outer' query. Either a positive integer (referring to an
  actual Database), or the saved questions virtual ID, which is a placeholder used for queries using the
  `:source-table \"card__id\"` shorthand for a source query resolved by middleware (since clients might not know the
  actual DB for that source query.)"
  (s/cond-pre (s/eq saved-questions-virtual-database-id) su/IntGreaterThanZero))

(def Query
  "Schema for an [outer] query, e.g. the sort of thing you'd pass to the query processor or save in
  `Card.dataset_query`."
  (->
   {:database                         DatabaseID
    ;; Type of query. `:query` = MBQL; `:native` = native. TODO - consider normalizing `:query` to `:mbql`
    :type                             (s/enum :query :native)
    (s/optional-key :native)          NativeQuery
    (s/optional-key :query)           MBQLQuery
    (s/optional-key :parameters)      [Parameter]
    ;;
    ;; OPTIONS
    ;;
    ;; These keys are used to tweak behavior of the Query Processor.
    ;; TODO - can we combine these all into a single `:options` map?
    ;;
    (s/optional-key :settings)        (s/maybe Settings)
    (s/optional-key :constraints)     (s/maybe Constraints)
    (s/optional-key :middleware)      (s/maybe MiddlewareOptions)
    ;;
    ;; INFO
    ;;
    ;; Used when recording info about this run in the QueryExecution log; things like context query was ran in and
    ;; User who ran it
    (s/optional-key :info)            (s/maybe Info)
    ;;
    ;; Other various keys get stuck in the query dictionary at some point or another by various pieces of QP
    ;; middleware to record bits of state. Everyone else can ignore them.
    s/Keyword                         s/Any}
   ;;
   ;; CONSTRAINTS
   ;;
   ;; Make sure we have the combo of query `:type` and `:native`/`:query`
   (s/constrained
    (every-pred
     (some-fn :native :query)
     (complement (every-pred :native :query)))
    "Query must specify either `:native` or `:query`, but not both.")
   (s/constrained
    (fn [{native :native, mbql :query, query-type :type}]
      (core/case query-type
        :native native
        :query  mbql))
    "Native queries must specify `:native`; MBQL queries must specify `:query`.")
   ;;
   ;; `:source-metadata` is added to queries when `card__id` source queries are resolved. It contains info about the
   ;; columns in the source query.
   ;;
   ;; Where this is added was changed in Metabase 0.33.0 -- previously, when `card__id` source queries were resolved,
   ;; the middleware would add `:source-metadata` to the top-level; to support joins against source queries, this has
   ;; been changed so it is always added at the same level the resolved `:source-query` is added.
   ;;
   ;; This should automatically be fixed by `normalize`; if we encounter it, it means some middleware is not
   ;; functioning properly
   (s/constrained
    (complement :source-metadata)
    "`:source-metadata` should be added in the same level as `:source-query` (i.e., the 'inner' MBQL query.)")))


;;; --------------------------------------------------- Validators ---------------------------------------------------

(def ^{:arglists '([query])} validate-query
  "Compiled schema validator for an [outer] Metabase query. (Pre-compling a validator is more efficient; use this
  instead of calling `(s/validate Query query)` or similar."
  (s/validator Query))
